import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from jianshu.items import JianshuItem


class JsSpiderSpider(CrawlSpider):
    name = 'js_spider'
    allowed_domains = ["jianshu.com"]
    start_urls = ['https://www.jianshu.com/p/78889b76dbd3']

    rules = (
        Rule(LinkExtractor(allow=r'.*/p/[0-9a-z]{12}'), callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        print('='*30)
        title = response.xpath('//h1[@class="_1RuRku"]/text()').get()
        content = response.xpath('//article[@class="_2rhmJa"]').get()
        url = response.url

        item = JianshuItem(
            title=title,
            content=content,
            url=url
        )
        yield item